# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
Dimensionality Reduction using PCA. Creating linear regression models to predict the performance of an antenna.
Data Analysis¶
Data Analysis on The Input Data¶
#Data loading
data = pd.read_csv('hw1_input.csv')
data
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3.202024 | 2.288742 | 0.012514 | 0.139247 | 0.041757 | 0.028566 | 0.000549 | 0.032403 | 0.348140 | 3.735926 | 4.278575 |
| 1 | 2.107848 | 2.895504 | 0.037171 | 0.149492 | 0.056775 | 0.028930 | 0.005536 | 0.053647 | 0.326369 | 4.929862 | 4.876068 |
| 2 | 3.252113 | 4.818411 | 0.025432 | 0.578834 | 0.029972 | 0.030922 | 0.020274 | 0.049845 | 0.446639 | 4.772670 | 4.745106 |
| 3 | 4.161509 | 2.294309 | 0.011058 | 0.117266 | 0.093223 | 0.017604 | 0.001135 | 0.098610 | 0.055665 | 4.102438 | 3.755671 |
| 4 | 4.820912 | 2.948325 | 0.019658 | 0.163503 | 0.094337 | 0.025757 | 0.021725 | 0.072813 | 0.272282 | 2.531031 | 3.047553 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 380 | 3.950131 | 5.165856 | 0.034389 | 0.770787 | 0.095579 | 0.025591 | 0.011430 | 0.048502 | 0.241219 | 4.099788 | 4.635040 |
| 381 | 2.676373 | 2.826965 | 0.011377 | 0.106795 | 0.055925 | 0.042376 | 0.005102 | 0.060532 | 0.294000 | 4.758303 | 4.205687 |
| 382 | 4.406785 | 1.814310 | 0.011187 | 0.146086 | 0.049041 | 0.040626 | 0.022849 | 0.033737 | 0.225794 | 2.826602 | 4.908306 |
| 383 | 2.540721 | 3.310983 | 0.015000 | 0.135650 | 0.082388 | 0.043490 | 0.011130 | 0.033451 | 0.370502 | 3.303852 | 4.764701 |
| 384 | 2.425315 | 4.998480 | 0.029217 | 0.605634 | 0.028654 | 0.037827 | 0.005896 | 0.033075 | 0.215161 | 4.354493 | 2.676568 |
385 rows × 11 columns
We are dealing with a data with 384 instances, 11 numeric attributes whose ranges are given.
#Check summary statistics
data.describe()
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 | 385.000000 |
| mean | 3.569210 | 3.536983 | 0.024273 | 0.347643 | 0.060065 | 0.032198 | 0.012797 | 0.060648 | 0.245586 | 3.704384 | 3.521911 |
| std | 0.966173 | 1.182100 | 0.008800 | 0.272738 | 0.023670 | 0.010352 | 0.007111 | 0.021503 | 0.111245 | 0.853877 | 0.871233 |
| min | 1.805658 | 1.801273 | 0.010008 | 0.100321 | 0.020039 | 0.015012 | 0.000003 | 0.025292 | 0.050810 | 2.023380 | 2.001679 |
| 25% | 2.755534 | 2.501163 | 0.016194 | 0.126901 | 0.038689 | 0.023389 | 0.006985 | 0.042011 | 0.148565 | 2.998152 | 2.783710 |
| 50% | 3.637716 | 3.215396 | 0.024198 | 0.155254 | 0.060764 | 0.030979 | 0.012454 | 0.060532 | 0.245049 | 3.866295 | 3.480916 |
| 75% | 4.369311 | 4.829731 | 0.031688 | 0.649324 | 0.080247 | 0.041819 | 0.019014 | 0.078227 | 0.340203 | 4.375551 | 4.278575 |
| max | 5.199919 | 5.198689 | 0.039843 | 0.799082 | 0.099728 | 0.049960 | 0.024996 | 0.099945 | 0.449599 | 4.999324 | 4.999950 |
#Check correlation
data.corr()
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| length of patch | 1.000000 | -0.114174 | -0.026032 | -0.064344 | 0.044502 | -0.069905 | -0.005560 | -0.009627 | 0.081735 | -0.037448 | -0.014496 |
| width of patch | -0.114174 | 1.000000 | 0.091726 | 0.923739 | -0.021056 | 0.035675 | -0.044198 | -0.013358 | 0.029999 | 0.442193 | 0.038746 |
| height of patch | -0.026032 | 0.091726 | 1.000000 | 0.082833 | 0.036045 | 0.030689 | 0.002422 | 0.037583 | 0.031592 | -0.044318 | -0.037769 |
| height of substrate | -0.064344 | 0.923739 | 0.082833 | 1.000000 | -0.011870 | 0.017410 | -0.031571 | -0.009432 | 0.044166 | 0.459847 | 0.002421 |
| height of solder resist layer | 0.044502 | -0.021056 | 0.036045 | -0.011870 | 1.000000 | -0.012756 | -0.018044 | -0.017071 | 0.024842 | -0.010967 | -0.038405 |
| radius of the probe | -0.069905 | 0.035675 | 0.030689 | 0.017410 | -0.012756 | 1.000000 | 0.002906 | -0.000523 | 0.048088 | -0.025728 | 0.007524 |
| c_pad | -0.005560 | -0.044198 | 0.002422 | -0.031571 | -0.018044 | 0.002906 | 1.000000 | 0.067678 | -0.015500 | -0.049318 | 0.015284 |
| c_antipad | -0.009627 | -0.013358 | 0.037583 | -0.009432 | -0.017071 | -0.000523 | 0.067678 | 1.000000 | -0.132321 | -0.016741 | 0.067821 |
| c_probe | 0.081735 | 0.029999 | 0.031592 | 0.044166 | 0.024842 | 0.048088 | -0.015500 | -0.132321 | 1.000000 | 0.015640 | -0.029101 |
| dielectric constant of substrate | -0.037448 | 0.442193 | -0.044318 | 0.459847 | -0.010967 | -0.025728 | -0.049318 | -0.016741 | 0.015640 | 1.000000 | 0.060402 |
| dielectric constant of solder resist layer | -0.014496 | 0.038746 | -0.037769 | 0.002421 | -0.038405 | 0.007524 | 0.015284 | 0.067821 | -0.029101 | 0.060402 | 1.000000 |
sns.pairplot(data)
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x28172ac70>
#Visualisation for correlation
sns.heatmap(data.corr())
<Axes: >
There is a high correlation between the height of substrate and the width of patch. Let's have a closer look for better understanding for the relationship of these two features.
sns.scatterplot(data['height of substrate'], data['width of patch'])
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<Axes: xlabel='height of substrate', ylabel='width of patch'>
Here we have highly correlated two parameters, the best approach would be the manual feature engineering that we derive meaningful combinations based on our knowledge in the data or in such cases simply one of these features can be elimated.
Since we don't have too many correlated features we may want a general reduction method that transforms features while retaining variance which is PCA.
Data Analysis on The Target Data¶
# Load the data
real_S11 = pd.read_csv('hw1_real.csv', header=None) # Real part of S11
imag_S11 = pd.read_csv('hw1_img.csv', header=None) # Imaginary part of S11
real_S11[1::].reset_index(drop=True)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.649471 | 0.608623 | 0.563770 | 0.515485 | 0.464365 | 0.411022 | 0.356056 | 0.300044 | 0.243525 | 0.186992 | ... | -0.895451 | -0.894250 | -0.893045 | -0.891835 | -0.890620 | -0.889402 | -0.888178 | -0.886951 | -0.885718 | -0.884482 |
| 1 | -0.684443 | -0.680926 | -0.677355 | -0.673727 | -0.670041 | -0.666296 | -0.662491 | -0.658624 | -0.654692 | -0.650696 | ... | -0.835303 | -0.832276 | -0.829209 | -0.826105 | -0.822965 | -0.819791 | -0.816584 | -0.813346 | -0.810078 | -0.806782 |
| 2 | 0.670194 | 0.678183 | 0.685886 | 0.693294 | 0.700399 | 0.707191 | 0.713661 | 0.719799 | 0.725595 | 0.731037 | ... | 0.693171 | 0.675653 | 0.656992 | 0.637113 | 0.615936 | 0.593384 | 0.569383 | 0.543870 | 0.516796 | 0.488139 |
| 3 | -0.856026 | -0.854962 | -0.853892 | -0.852815 | -0.851731 | -0.850642 | -0.849545 | -0.848442 | -0.847333 | -0.846217 | ... | -0.115205 | -0.096197 | -0.076456 | -0.055953 | -0.034660 | -0.012550 | 0.010402 | 0.034219 | 0.058921 | 0.084521 |
| 4 | -0.952876 | -0.951901 | -0.950906 | -0.949892 | -0.948860 | -0.947811 | -0.946747 | -0.945668 | -0.944575 | -0.943469 | ... | -0.692349 | -0.690720 | -0.689088 | -0.687453 | -0.685813 | -0.684171 | -0.682524 | -0.680874 | -0.679221 | -0.677564 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 380 | 0.712664 | 0.703017 | 0.692973 | 0.682540 | 0.671727 | 0.660545 | 0.649007 | 0.637130 | 0.624930 | 0.612431 | ... | 0.548479 | 0.560256 | 0.571626 | 0.582598 | 0.593180 | 0.603380 | 0.613208 | 0.622674 | 0.631789 | 0.640562 |
| 381 | -0.304243 | -0.267997 | -0.229050 | -0.187238 | -0.142418 | -0.094486 | -0.043392 | 0.010828 | 0.068016 | 0.127852 | ... | -0.898600 | -0.897598 | -0.896590 | -0.895579 | -0.894562 | -0.893540 | -0.892514 | -0.891483 | -0.890447 | -0.889406 |
| 382 | -0.985887 | -0.986109 | -0.986290 | -0.986434 | -0.986540 | -0.986612 | -0.986650 | -0.986656 | -0.986631 | -0.986577 | ... | -0.831635 | -0.830253 | -0.828863 | -0.827464 | -0.826056 | -0.824640 | -0.823215 | -0.821781 | -0.820338 | -0.818886 |
| 383 | -0.696270 | -0.692488 | -0.688645 | -0.684740 | -0.680771 | -0.676737 | -0.672637 | -0.668469 | -0.664232 | -0.659923 | ... | -0.906488 | -0.906808 | -0.906959 | -0.906948 | -0.906784 | -0.906473 | -0.906022 | -0.905439 | -0.904728 | -0.903896 |
| 384 | 0.689258 | 0.681693 | 0.674312 | 0.667233 | 0.660581 | 0.654481 | 0.649055 | 0.644419 | 0.640675 | 0.637908 | ... | 0.536188 | 0.546594 | 0.556813 | 0.566843 | 0.576688 | 0.586347 | 0.595821 | 0.605112 | 0.614219 | 0.623143 |
385 rows × 201 columns
Relationship With The Input Data¶
Let's focus on a single frequency point 100 first.
df = pd.merge(data, real_S11[1::].reset_index(drop=True)[[100]], left_index=True, right_index=True)
df
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | 100 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3.202024 | 2.288742 | 0.012514 | 0.139247 | 0.041757 | 0.028566 | 0.000549 | 0.032403 | 0.348140 | 3.735926 | 4.278575 | -0.975209 |
| 1 | 2.107848 | 2.895504 | 0.037171 | 0.149492 | 0.056775 | 0.028930 | 0.005536 | 0.053647 | 0.326369 | 4.929862 | 4.876068 | 0.522707 |
| 2 | 3.252113 | 4.818411 | 0.025432 | 0.578834 | 0.029972 | 0.030922 | 0.020274 | 0.049845 | 0.446639 | 4.772670 | 4.745106 | 0.386045 |
| 3 | 4.161509 | 2.294309 | 0.011058 | 0.117266 | 0.093223 | 0.017604 | 0.001135 | 0.098610 | 0.055665 | 4.102438 | 3.755671 | -0.702926 |
| 4 | 4.820912 | 2.948325 | 0.019658 | 0.163503 | 0.094337 | 0.025757 | 0.021725 | 0.072813 | 0.272282 | 2.531031 | 3.047553 | -0.826891 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 380 | 3.950131 | 5.165856 | 0.034389 | 0.770787 | 0.095579 | 0.025591 | 0.011430 | 0.048502 | 0.241219 | 4.099788 | 4.635040 | 0.743482 |
| 381 | 2.676373 | 2.826965 | 0.011377 | 0.106795 | 0.055925 | 0.042376 | 0.005102 | 0.060532 | 0.294000 | 4.758303 | 4.205687 | -0.970511 |
| 382 | 4.406785 | 1.814310 | 0.011187 | 0.146086 | 0.049041 | 0.040626 | 0.022849 | 0.033737 | 0.225794 | 2.826602 | 4.908306 | -0.930577 |
| 383 | 2.540721 | 3.310983 | 0.015000 | 0.135650 | 0.082388 | 0.043490 | 0.011130 | 0.033451 | 0.370502 | 3.303852 | 4.764701 | 0.470467 |
| 384 | 2.425315 | 4.998480 | 0.029217 | 0.605634 | 0.028654 | 0.037827 | 0.005896 | 0.033075 | 0.215161 | 4.354493 | 2.676568 | -0.155731 |
385 rows × 12 columns
# Relationships between the features and the real part at a single frequency point
sns.pairplot(df, y_vars=[100])
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x2d5d443d0>
df.corr()[100]
length of patch -0.027677 width of patch 0.830605 height of patch 0.071386 height of substrate 0.880056 height of solder resist layer -0.007944 radius of the probe -0.035128 c_pad -0.040812 c_antipad -0.028853 c_probe 0.110294 dielectric constant of substrate 0.377078 dielectric constant of solder resist layer 0.033185 100 1.000000 Name: 100, dtype: float64
# Relationships between the target (frequency 100) and width of patch feature
plt.scatter(df['width of patch'], df[100])
<matplotlib.collections.PathCollection at 0x33ce52760>
Eventhough they look correlated, we do not observe a linear relationship.
$ {Magnitude}(S_{11}) = {Real Part}^2 +{Imaginary Part}^2$
# Calculate the magnitude of S11
S11_magnitude = np.sqrt(real_S11**2 + imag_S11**2)
S11_target=S11_magnitude[1::].reset_index(drop=True)
S11_target
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.781778 | 0.783440 | 0.785795 | 0.788790 | 0.792359 | 0.796433 | 0.800936 | 0.805793 | 0.810930 | 0.816277 | ... | 0.988520 | 0.988551 | 0.988581 | 0.988610 | 0.988638 | 0.988664 | 0.988690 | 0.988714 | 0.988737 | 0.988759 |
| 1 | 0.986860 | 0.986669 | 0.986470 | 0.986263 | 0.986048 | 0.985824 | 0.985592 | 0.985350 | 0.985100 | 0.984839 | ... | 0.945061 | 0.945880 | 0.946669 | 0.947428 | 0.948158 | 0.948861 | 0.949537 | 0.950188 | 0.950814 | 0.951417 |
| 2 | 0.866883 | 0.865643 | 0.864258 | 0.862724 | 0.861039 | 0.859198 | 0.857199 | 0.855036 | 0.852705 | 0.850202 | ... | 0.865665 | 0.861328 | 0.856551 | 0.851268 | 0.845404 | 0.838870 | 0.831564 | 0.823368 | 0.814148 | 0.803752 |
| 3 | 0.995069 | 0.995055 | 0.995041 | 0.995025 | 0.995009 | 0.994991 | 0.994973 | 0.994953 | 0.994933 | 0.994912 | ... | 0.935682 | 0.932755 | 0.929637 | 0.926312 | 0.922765 | 0.918978 | 0.914932 | 0.910607 | 0.905982 | 0.901033 |
| 4 | 0.985009 | 0.985235 | 0.985447 | 0.985645 | 0.985831 | 0.986005 | 0.986169 | 0.986321 | 0.986464 | 0.986597 | ... | 0.988472 | 0.988418 | 0.988361 | 0.988304 | 0.988244 | 0.988183 | 0.988121 | 0.988057 | 0.987991 | 0.987924 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 380 | 0.712967 | 0.703130 | 0.692989 | 0.682545 | 0.671797 | 0.660746 | 0.649395 | 0.637749 | 0.625814 | 0.613598 | ... | 0.675654 | 0.683714 | 0.691363 | 0.698625 | 0.705520 | 0.712067 | 0.718286 | 0.724194 | 0.729808 | 0.735143 |
| 381 | 0.901571 | 0.894376 | 0.886417 | 0.877604 | 0.867842 | 0.857028 | 0.845054 | 0.831812 | 0.817200 | 0.801129 | ... | 0.990712 | 0.990714 | 0.990716 | 0.990716 | 0.990715 | 0.990713 | 0.990710 | 0.990706 | 0.990701 | 0.990695 |
| 382 | 0.985925 | 0.986114 | 0.986294 | 0.986465 | 0.986629 | 0.986785 | 0.986934 | 0.987076 | 0.987212 | 0.987341 | ... | 0.987993 | 0.987889 | 0.987783 | 0.987674 | 0.987561 | 0.987445 | 0.987326 | 0.987204 | 0.987078 | 0.986949 |
| 383 | 0.984282 | 0.984049 | 0.983807 | 0.983558 | 0.983300 | 0.983034 | 0.982758 | 0.982473 | 0.982179 | 0.981875 | ... | 0.911483 | 0.913052 | 0.914566 | 0.916027 | 0.917439 | 0.918802 | 0.920118 | 0.921390 | 0.922618 | 0.923805 |
| 384 | 0.710091 | 0.701981 | 0.694257 | 0.687033 | 0.680424 | 0.674545 | 0.669505 | 0.665398 | 0.662302 | 0.660269 | ... | 0.818936 | 0.821178 | 0.823357 | 0.825474 | 0.827530 | 0.829524 | 0.831458 | 0.833330 | 0.835142 | 0.836893 |
385 rows × 201 columns
Now, let us focus on magnitude at a single frequency point.
df = pd.merge(data, S11_target[[100]], left_index=True, right_index=True)
df
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | 100 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3.202024 | 2.288742 | 0.012514 | 0.139247 | 0.041757 | 0.028566 | 0.000549 | 0.032403 | 0.348140 | 3.735926 | 4.278575 | 0.978879 |
| 1 | 2.107848 | 2.895504 | 0.037171 | 0.149492 | 0.056775 | 0.028930 | 0.005536 | 0.053647 | 0.326369 | 4.929862 | 4.876068 | 0.649184 |
| 2 | 3.252113 | 4.818411 | 0.025432 | 0.578834 | 0.029972 | 0.030922 | 0.020274 | 0.049845 | 0.446639 | 4.772670 | 4.745106 | 0.754555 |
| 3 | 4.161509 | 2.294309 | 0.011058 | 0.117266 | 0.093223 | 0.017604 | 0.001135 | 0.098610 | 0.055665 | 4.102438 | 3.755671 | 0.990385 |
| 4 | 4.820912 | 2.948325 | 0.019658 | 0.163503 | 0.094337 | 0.025757 | 0.021725 | 0.072813 | 0.272282 | 2.531031 | 3.047553 | 0.989076 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 380 | 3.950131 | 5.165856 | 0.034389 | 0.770787 | 0.095579 | 0.025591 | 0.011430 | 0.048502 | 0.241219 | 4.099788 | 4.635040 | 0.779355 |
| 381 | 2.676373 | 2.826965 | 0.011377 | 0.106795 | 0.055925 | 0.042376 | 0.005102 | 0.060532 | 0.294000 | 4.758303 | 4.205687 | 0.982447 |
| 382 | 4.406785 | 1.814310 | 0.011187 | 0.146086 | 0.049041 | 0.040626 | 0.022849 | 0.033737 | 0.225794 | 2.826602 | 4.908306 | 0.990335 |
| 383 | 2.540721 | 3.310983 | 0.015000 | 0.135650 | 0.082388 | 0.043490 | 0.011130 | 0.033451 | 0.370502 | 3.303852 | 4.764701 | 0.757697 |
| 384 | 2.425315 | 4.998480 | 0.029217 | 0.605634 | 0.028654 | 0.037827 | 0.005896 | 0.033075 | 0.215161 | 4.354493 | 2.676568 | 0.435774 |
385 rows × 12 columns
# Relationships between the features and the magnitude at a single frequency point
sns.pairplot(df, y_vars=[100])
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x2d62b3d60>
Using a single frequency point approach doesn't really give us anything useful.
Let us examine the minimum extreme points.
# Plot the magnitude for the first 12 designs
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('S11 Magnitudes for the First 12 Designs Across Frequency Points')
ax.set_xlabel('Frequency Index')
ax.set_ylabel('S11 Magnitude')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "S11 magnitude values showing antenna reflection performance over 201 frequency points and the absolute minimum points for each design where it has the strongest reflection performance. ",
ha="center", fontsize=10, wrap=True)
for i in range(1, 13): # First 12 designs
plt.plot(S11_magnitude.iloc[i, :], label=f"Design {i}")
# Add labels to specific points
min_value = np.min(S11_magnitude.iloc[i, :])
min_index = np.argmin(S11_magnitude.iloc[i, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x369972a90>
# Find the minimum value and its index for the 3th design
min_value = np.min(S11_magnitude.iloc[3, :])
min_index = np.argmin(S11_magnitude.iloc[3, :])
print(f"Minimum value: {min_value} at index {min_index}")
Minimum value: 0.02148968319005195 at index 67
# Plot the magnitude for the first 6 designs
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('3th Design')
ax.set_xlabel('Frequency Index')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "For example, the green design shows the S11 value approaching zero (0.02148968319005195) around the frequency at index 70 (67). Smaller magnitudes indicate less signal loss, meaning a stronger reflection performance. In the case of S11, a smaller magnitude implies that the signal can be received more effectively around that frequency.",
ha="center", fontsize=10, wrap=True)
min_value = np.min(S11_magnitude.iloc[3, :])
min_index = np.argmin(S11_magnitude.iloc[3, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_magnitude.iloc[3, :], label="Magnitude", c='g', lw=2.5)
plt.plot(real_S11.iloc[3, :], label="Real Part", c='r',ls=':', lw=1.5)
plt.plot(imag_S11.iloc[3, :], label="Imaginary Part", c='b',ls=':',lw=1.5)
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x297579c70>
Resonance Indices for The Target Data¶
indices=[]
# Plot all designs' critical frequencies where antenna's performance is the best
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('Frequency Values and S11 Magnitudes For Each Design Where It Has The Strongest Reflection Performance.')
ax.set_xlabel('Frequency Index')
ax.set_ylabel('S11 Magnitude')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "Showing the frequency points for each design where it has the strongest reflection performance. ",
ha="center", fontsize=10, wrap=True)
for i in range(1, len(S11_magnitude)):
# Add labels to specific points
min_value = np.min(S11_magnitude.iloc[i, :])
min_index = np.argmin(S11_magnitude.iloc[i, :])
plt.scatter(min_index, min_value, c='b', s=0.9)
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=3, ha='left')
indices.append(min_index)
Slecting frequency points 0 and 200 as resonance frequencies, we can significantly reduce the complexity of the regression task. This approach allows you to predict the antenna's performance at the most critical points, rather than trying to model the entire frequency range.
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_xlabel('Frequency Index')
ax.set_ylabel('Counts')
plt.hist(indices, bins=40);
As an additional resonance point, the frequency value 100 can be selected since it has the 3rd most number of designs that have the peak performance at this frequency value.
# Omit the frequency points 0-10 and 190-200
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_xlabel('Frequency Index')
ax.set_ylabel('Counts')
plt.hist(indices, bins=10, range=(10, 190));
# Replace with actual indices based on your analysis
key_frequencies = [0, 100, 200]
Resonance Values As The Target¶
values=[]
values_real=[]
values_imag=[]
for i in range(0, len(S11_magnitude)):
# Add labels to specific points
min_value = np.min(S11_magnitude.iloc[i, :])
values.append(min_value)
min_value = np.min(real_S11[1::].reset_index(drop=True).iloc[i, :])
values_real.append(min_value)
min_value = np.min(imag_S11[1::].reset_index(drop=True).iloc[i, :])
values_imag.append(min_value)
values_df=pd.DataFrame(values, columns=["resonance"])
values_real_df=pd.DataFrame(values_real, columns=["resonance_real"])
values_imag_df=pd.DataFrame(values_imag, columns=["resonance_imag"])
#### Adding Resonance Values As The Target
df = pd.merge(data, values_df , left_index=True, right_index=True)
df = pd.merge(df, values_real_df , left_index=True, right_index=True)
df = pd.merge(df, values_imag_df , left_index=True, right_index=True)
df
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | resonance | resonance_real | resonance_imag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3.202024 | 2.288742 | 0.012514 | 0.139247 | 0.041757 | 0.028566 | 0.000549 | 0.032403 | 0.348140 | 3.735926 | 4.278575 | 0.781778 | -0.976053 | -0.840042 |
| 1 | 2.107848 | 2.895504 | 0.037171 | 0.149492 | 0.056775 | 0.028930 | 0.005536 | 0.053647 | 0.326369 | 4.929862 | 4.876068 | 0.476733 | -0.887572 | -0.544274 |
| 2 | 3.252113 | 4.818411 | 0.025432 | 0.578834 | 0.029972 | 0.030922 | 0.020274 | 0.049845 | 0.446639 | 4.772670 | 4.745106 | 0.021490 | -0.078516 | -0.638543 |
| 3 | 4.161509 | 2.294309 | 0.011058 | 0.117266 | 0.093223 | 0.017604 | 0.001135 | 0.098610 | 0.055665 | 4.102438 | 3.755671 | 0.901033 | -0.856026 | 0.507327 |
| 4 | 4.820912 | 2.948325 | 0.019658 | 0.163503 | 0.094337 | 0.025757 | 0.021725 | 0.072813 | 0.272282 | 2.531031 | 3.047553 | 0.985009 | -0.952876 | 0.249540 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 380 | 3.950131 | 5.165856 | 0.034389 | 0.770787 | 0.095579 | 0.025591 | 0.011430 | 0.048502 | 0.241219 | 4.099788 | 4.635040 | 0.167674 | 0.159942 | -0.231896 |
| 381 | 2.676373 | 2.826965 | 0.011377 | 0.106795 | 0.055925 | 0.042376 | 0.005102 | 0.060532 | 0.294000 | 4.758303 | 4.205687 | 0.581230 | -0.974088 | -0.708839 |
| 382 | 4.406785 | 1.814310 | 0.011187 | 0.146086 | 0.049041 | 0.040626 | 0.022849 | 0.033737 | 0.225794 | 2.826602 | 4.908306 | 0.985925 | -0.986656 | -0.008603 |
| 383 | 2.540721 | 3.310983 | 0.015000 | 0.135650 | 0.082388 | 0.043490 | 0.011130 | 0.033451 | 0.370502 | 3.303852 | 4.764701 | 0.573305 | -0.906959 | -0.637722 |
| 384 | 2.425315 | 4.998480 | 0.029217 | 0.605634 | 0.028654 | 0.037827 | 0.005896 | 0.033075 | 0.215161 | 4.354493 | 2.676568 | 0.398853 | -0.421003 | -0.510269 |
385 rows × 14 columns
df.corr()['resonance']
length of patch 0.088859 width of patch -0.719391 height of patch -0.057423 height of substrate -0.727314 height of solder resist layer 0.006547 radius of the probe 0.016414 c_pad 0.076387 c_antipad 0.018615 c_probe 0.044486 dielectric constant of substrate -0.498250 dielectric constant of solder resist layer -0.107778 resonance 1.000000 resonance_real -0.698963 resonance_imag 0.411778 Name: resonance, dtype: float64
# Relationships between the features and the resonance values
sns.pairplot(df, y_vars=['resonance'])
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x2d98ee670>
df.corr()['resonance_real']
length of patch 0.009014 width of patch 0.894428 height of patch 0.074876 height of substrate 0.960081 height of solder resist layer 0.004733 radius of the probe -0.049564 c_pad -0.028054 c_antipad 0.000707 c_probe 0.007350 dielectric constant of substrate 0.401108 dielectric constant of solder resist layer 0.012930 resonance -0.698963 resonance_real 1.000000 resonance_imag -0.187013 Name: resonance_real, dtype: float64
# Relationships between the features and the resonance values (real part)
sns.pairplot(df, y_vars=['resonance_real'])
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x2d98dd4c0>
df.corr()['resonance_imag']
length of patch 0.057338 width of patch -0.197444 height of patch 0.065604 height of substrate -0.267361 height of solder resist layer 0.023461 radius of the probe -0.139563 c_pad 0.081723 c_antipad 0.152817 c_probe -0.391245 dielectric constant of substrate -0.203338 dielectric constant of solder resist layer -0.009696 resonance 0.411778 resonance_real -0.187013 resonance_imag 1.000000 Name: resonance_imag, dtype: float64
# Relationships between the features and the resonance values (imaginary part)
sns.pairplot(df, y_vars=['resonance_imag'])
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x2d9f05d90>
PCA with Scikit-Learn¶
## Scikit-Learn Implementations
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
Principle Component Analysis on The Input Data¶
#Scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)
#Apply PCA
pca = PCA()
principal_components = pca.fit_transform(scaled_data)
#Pairplot for 11 principle components
pc_df = pd.DataFrame(principal_components)
sns.pairplot(pc_df)
/Users/eren/micromamba/envs/Py_ML_DS_env/lib/python3.9/site-packages/seaborn/axisgrid.py:64: UserWarning: The figure layout has changed to tight self.fig.tight_layout(*args, **kwargs)
<seaborn.axisgrid.PairGrid at 0x2d3993d60>
#Explained variance ratio per principal component.
pca.explained_variance_ratio_
array([0.20715348, 0.11070825, 0.10003384, 0.09607237, 0.09187079,
0.08845826, 0.08565904, 0.08165053, 0.07339863, 0.05830268,
0.00669213])
# Cumulative sum of the explained variance ratio
cum_explained_variance= pca.explained_variance_ratio_.cumsum()
cum_explained_variance
array([0.20715348, 0.31786173, 0.41789557, 0.51396794, 0.60583873,
0.69429699, 0.77995603, 0.86160656, 0.93500519, 0.99330787,
1. ])
pd.DataFrame({"Explained Variance Ratio": pca.explained_variance_ratio_, "Cumulative Explained Variance Ratio":cum_explained_variance}, index=[f"Comp.{i+1}" for i in (pc_df.columns)])
| Explained Variance Ratio | Cumulative Explained Variance Ratio | |
|---|---|---|
| Comp.1 | 0.207153 | 0.207153 |
| Comp.2 | 0.110708 | 0.317862 |
| Comp.3 | 0.100034 | 0.417896 |
| Comp.4 | 0.096072 | 0.513968 |
| Comp.5 | 0.091871 | 0.605839 |
| Comp.6 | 0.088458 | 0.694297 |
| Comp.7 | 0.085659 | 0.779956 |
| Comp.8 | 0.081651 | 0.861607 |
| Comp.9 | 0.073399 | 0.935005 |
| Comp.10 | 0.058303 | 0.993308 |
| Comp.11 | 0.006692 | 1.000000 |
Observe that, when we take only the first eight principal components we lose more than 10% of explained variance during dimensionality reduction.
#Plot the cumulative sum of the explained variance ratio
plt.figure(figsize=(8, 6))
plt.plot(cum_explained_variance, marker='o', linestyle='--')
plt.title('Cumulative Explained Variance by PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.figtext(0.5, -0.05, " Observe that, when we take only the first eight principal components we lose more than 10% of explained variance during dimensionality reduction.",
ha="center", fontsize=10, wrap=True)
plt.grid(True)
plt.show()
loadings_df = pd.DataFrame(pca.components_, columns=data.columns, index=[f"Comp.{i+1}" for i in (pc_df.columns)])
loadings_df
| length of patch | width of patch | height of patch | height of substrate | height of solder resist layer | radius of the probe | c_pad | c_antipad | c_probe | dielectric constant of substrate | dielectric constant of solder resist layer | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Comp.1 | -0.101327 | 0.624178 | 0.071054 | 0.624137 | -0.021049 | 0.025997 | -0.054228 | -0.018720 | 0.039213 | 0.445100 | 0.038422 |
| Comp.2 | 0.315409 | -0.009718 | 0.055716 | 0.018824 | 0.266259 | 0.002815 | -0.265990 | -0.559499 | 0.564753 | -0.012829 | -0.350944 |
| Comp.3 | -0.369645 | 0.040766 | 0.620460 | 0.016441 | 0.086417 | 0.548381 | 0.144827 | 0.088323 | 0.093131 | -0.256167 | -0.255339 |
| Comp.4 | 0.376883 | 0.039631 | 0.392974 | 0.076518 | 0.510652 | -0.454397 | 0.177533 | 0.373926 | -0.151004 | -0.022492 | -0.184975 |
| Comp.5 | 0.409833 | 0.012169 | 0.017701 | 0.028012 | -0.124629 | 0.218986 | 0.580059 | 0.127759 | 0.487544 | 0.012254 | 0.420314 |
| Comp.6 | 0.073314 | -0.032326 | 0.069714 | -0.060565 | 0.459841 | 0.304908 | -0.547579 | 0.172140 | 0.016285 | 0.027772 | 0.591708 |
| Comp.7 | -0.253331 | 0.000413 | -0.490098 | 0.002837 | 0.647199 | 0.212779 | 0.429976 | -0.138146 | -0.102528 | 0.097290 | -0.086813 |
| Comp.8 | 0.454266 | 0.022090 | -0.317943 | 0.059481 | -0.110408 | 0.491478 | -0.174386 | 0.415883 | -0.129279 | 0.053226 | -0.460296 |
| Comp.9 | -0.406238 | -0.040157 | -0.216559 | -0.038186 | 0.040184 | -0.240280 | -0.140321 | 0.548589 | 0.618694 | 0.031927 | -0.145645 |
| Comp.10 | -0.036934 | -0.328516 | 0.243013 | -0.305362 | -0.027430 | 0.080764 | 0.049041 | -0.000574 | 0.000912 | 0.849121 | -0.087887 |
| Comp.11 | -0.036141 | -0.704172 | 0.006071 | 0.708004 | -0.004432 | 0.010532 | -0.010954 | -0.006460 | -0.008289 | -0.018678 | 0.028625 |
Recall first component!
Comp.2 = (length of patch × 0.315409) + (width of patch × -0.009718) + (height of patch × 0.055716) + (height of substrate × 0.018824) + (height of solder resist layer × 0.266259) + (radius of the probe × 0.002815) + (c_pad × -0.265990) + (c_antipad × -0.559499) + (c_probe × 0.564753) + (dielectric constant of substrate × -0.012829) + (dielectric constant of solder resist layer × -0.350944)
Principle Component Analysis on The Target Data¶
#Scale the data
scaler1 = StandardScaler()
scaled_data_s11 = scaler1.fit_transform(S11_target)
plt.figure(figsize=(8, 6))
#Apply PCA for s11
pca = PCA()
pca.fit(scaled_data_s11)
# Plot the explained variance ratio
plt.plot(pca.explained_variance_ratio_.cumsum(), label="S11_target_pca", c='g')
plt.title('Cumulative Explained Variance by PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.grid(True)
plt.legend(loc="best", fontsize=10)
plt.show()
#Scale the data
scaler2 = StandardScaler()
scaled_data_real = scaler2.fit_transform(real_S11[1::].reset_index(drop=True))
#Apply PCA for real part
pca = PCA()
pca.fit(scaled_data_real)
# Plot the explained variance ratio
plt.figure(figsize=(8, 6))
plt.plot(pca.explained_variance_ratio_.cumsum() , label="real_pca", c='b')
plt.title('Cumulative Explained Variance by PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.grid(True)
plt.legend(loc="best", fontsize=10)
plt.show()
#Scale the data
scaler3 = StandardScaler()
scaled_data_imag = scaler3.fit_transform(imag_S11[1::].reset_index(drop=True))
#Apply PCA for real part
pca = PCA()
pca.fit(scaled_data_imag)
# Plot the explained variance ratio
plt.figure(figsize=(8, 6))
plt.plot(pca.explained_variance_ratio_.cumsum() , label="imag_pca", c='r')
plt.title('Cumulative Explained Variance by PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.grid(True)
plt.legend(loc="best", fontsize=10)
plt.show()
pd.DataFrame({"Explained Variance Ratio": pca.explained_variance_ratio_, "Cumulative Explained Variance Ratio":pca.explained_variance_ratio_.cumsum()}, index=[f"Comp.{i+1}" for i in (S11_target.columns)])
| Explained Variance Ratio | Cumulative Explained Variance Ratio | |
|---|---|---|
| Comp.1 | 4.167438e-01 | 0.416744 |
| Comp.2 | 2.269942e-01 | 0.643738 |
| Comp.3 | 1.619304e-01 | 0.805668 |
| Comp.4 | 1.030973e-01 | 0.908766 |
| Comp.5 | 3.782842e-02 | 0.946594 |
| ... | ... | ... |
| Comp.197 | 1.933790e-33 | 1.000000 |
| Comp.198 | 1.801411e-33 | 1.000000 |
| Comp.199 | 1.772045e-33 | 1.000000 |
| Comp.200 | 1.561350e-33 | 1.000000 |
| Comp.201 | 1.053047e-33 | 1.000000 |
201 rows × 2 columns
scaled_data_real
array([[ 1.06574433, 1.01453916, 0.95853213, ..., -1.03117649,
-1.03066207, -1.03006856],
[-0.64499146, -0.6398629 , -0.63457663, ..., -0.9168636 ,
-0.91321435, -0.90942875],
[ 1.09232153, 1.10378008, 1.11527939, ..., 1.19098874,
1.14704934, 1.10111919],
...,
[-1.0315917 , -1.03139119, -1.03112695, ..., -0.92996344,
-0.9291446 , -0.92822175],
[-0.66015982, -0.65469567, -0.64906876, ..., -1.05988994,
-1.0601784 , -1.06021103],
[ 1.11677049, 1.1082833 , 1.1004233 , ..., 1.28610211,
1.29831994, 1.31073178]])
plt.figure(figsize=(8, 6))
#Apply PCA.
pca1 = PCA(10)
s11_target_pca= pca1.fit(scaled_data_s11)
# Plot the explained variance ratio
plt.plot(s11_target_pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--', label="S11_target_pca", c="g")
#Apply PCA.
pca2 = PCA(10)
real_pca=pca2.fit(scaled_data_real)
# Plot the explained variance ratio
plt.plot(real_pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--', label="real_pca",c="b")
#Apply PCA.
pca3 = PCA(10)
imag_pca=pca3.fit(scaled_data_imag)
# Plot the explained variance ratio
plt.plot(imag_pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--', label="imag_pca",c="r")
plt.title('Cumulative Explained Variance by PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.figtext(0.5, -0.05, " With only 10 components selected it almost covers all of the variance of the data.",
ha="center", fontsize=10, wrap=True)
plt.grid(True)
plt.legend(loc="best", fontsize=10)
plt.show()
# Get the principle components (transformed)
S11_target_pca = pca1.fit_transform(scaled_data_s11)
Real_pca = pca2.fit_transform(scaled_data_real)
Imag_pca = pca3.fit_transform(scaled_data_imag)
# Reconstruct the original data from the principle components
S11_reconstructed = scaler1.inverse_transform(pca1.inverse_transform(S11_target_pca))
real_reconstructed = scaler2.inverse_transform(pca2.inverse_transform(Real_pca))
imag_reconstructed = scaler3.inverse_transform(pca3.inverse_transform(Imag_pca))
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('3th Design (Reconstructed From 10 Principle Components)')
ax.set_xlabel('Frequency Index')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "",
ha="center", fontsize=10, wrap=True)
min_value = np.min(S11_reconstructed[2, :])
min_index = np.argmin(S11_reconstructed[2, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_reconstructed[2, :], label="Magnitude", c='g', lw=2.5)
plt.plot(real_reconstructed[2, :], label="Real Part", c='r',ls=':', lw=1.5)
plt.plot(imag_reconstructed[2, :], label="Imaginary Part", c='b',ls=':',lw=1.5)
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x369261160>
plt.figure(figsize=(8, 6))
#Apply PCA to get at least 90% explained variance.
pca1 = PCA(0.9)
s11_target_pca= pca1.fit(scaled_data_s11)
# Plot the explained variance ratio
plt.plot(s11_target_pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--', label="S11_target_pca", c="g")
#Apply PCA.
pca2 = PCA(4)
real_pca=pca2.fit(scaled_data_real)
# Plot the explained variance ratio
plt.plot(real_pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--', label="real_pca",c="b")
#Apply PCA.
pca3 = PCA(4)
imag_pca=pca3.fit(scaled_data_imag)
# Plot the explained variance ratio
plt.plot(imag_pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--', label="imag_pca",c="r")
plt.title('Cumulative Explained Variance by PCA Components')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.figtext(0.5, -0.05, " With 4 components selected we lose less than 10% of explained variance during dimensionality reduction.",
ha="center", fontsize=10, wrap=True)
plt.grid(True)
plt.legend(loc="best", fontsize=10)
plt.show()
# Get the principle components (transformed)
S11_target_pca = pca1.fit_transform(scaled_data_s11)
Real_pca = pca2.fit_transform(scaled_data_real)
Imag_pca = pca3.fit_transform(scaled_data_imag)
# Reconstruct the original data from the principle components
S11_reconstructed = scaler1.inverse_transform(pca1.inverse_transform(S11_target_pca))
real_reconstructed = scaler2.inverse_transform(pca2.inverse_transform(Real_pca))
imag_reconstructed = scaler3.inverse_transform(pca3.inverse_transform(Imag_pca))
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('3th Design (Reconstructed From 4 Principle Components)')
ax.set_xlabel('Frequency Index')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "",
ha="center", fontsize=10, wrap=True)
min_value = np.min(S11_reconstructed[2, :])
min_index = np.argmin(S11_reconstructed[2, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_reconstructed[2, :], label="Magnitude", c='g', lw=2.5)
plt.plot(real_reconstructed[2, :], label="Real Part", c='r',ls=':', lw=1.5)
plt.plot(imag_reconstructed[2, :], label="Imaginary Part", c='b',ls=':',lw=1.5)
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x367b28a90>
Regression¶
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error, accuracy_score
1. Multitarget Regression¶
- S11 values are evaluated at 201 frequency points.
# Build the pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('reducer', PCA(n_components=8)),
('regressor', MultiOutputRegressor(RandomForestRegressor(random_state=101)))])
# Split the data and S11_target into training and testing data
X_train, X_test, y_train_201, y_test_201 = train_test_split(data, S11_target, test_size=0.3, random_state=101)
# Fit the pipeline to the training data
pipe.fit(X_train, y_train_201)
# Make predictions on the test set
y_pred_201 = pipe.predict(X_test)
# Calculate the mean squared error
MSE_1 = mean_squared_error(y_test_201, y_pred_201, multioutput='uniform_average')
# Prints the accuracy score
print(pipe.score(X_test, y_test_201))
print(MSE_1)
0.5594020626180238 0.019159774874724738
- S11 parameters, real and imaginary parts are evaluated separately at 201 frequency points.
# Build the pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('reducer', PCA(n_components=8)),
('regressor', MultiOutputRegressor(RandomForestRegressor(random_state=101)))])
# Split the data and S11 parameters into training and testing data, use the same model using two different target sets.
X_train, X_test, y_train_201_real, y_test_201_real = train_test_split(data, real_S11[1::].reset_index(drop=True), test_size=0.3, random_state=101)
X_train, X_test, y_train_201_imag, y_test_201_imag = train_test_split(data, imag_S11[1::].reset_index(drop=True), test_size=0.3, random_state=101)
# Fit the pipeline to the training data and get predictions for both real and imaginary parameters.
pipe.fit(X_train, y_train_201_real)
real_201_predict= pipe.predict(X_test)
pipe.fit(X_train, y_train_201_imag)
imag_201_predict= pipe.predict(X_test)
# Calculate the S11 values
S11_magnitude_predict = np.sqrt(real_201_predict**2 + imag_201_predict**2)
S11_magnitude_test = np.sqrt(y_test_201_real**2 + y_test_201_imag**2)
# Score the mse on the test set
MSE_2 = mean_squared_error(S11_magnitude_test, S11_magnitude_predict)
# Prints the mse
print(MSE_2)
0.04225208448301995
0.0422520844830199
2. Multitarget (At Resonance Frequencies) Regression¶
- S11 values are evaluated at 3 key frequency points (0,100,200).
# Build the pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('reducer', PCA(n_components=8)),
('regressor', MultiOutputRegressor(RandomForestRegressor(random_state=101)))])
# Split the data and S11_target into training and testing data
X_train, X_test, y_train, y_test = train_test_split(data, S11_target.iloc[:, key_frequencies], test_size=0.3, random_state=101)
# Fit the pipeline to the training data
pipe.fit(X_train, y_train)
# Make predictions on the test set
y_pred = pipe.predict(X_test)
# Calculate the mean squared error
MSE_3 = mean_squared_error(y_test, y_pred, multioutput='uniform_average')
# Prints the accuracy score
print(MSE_3)
0.018801862109574585
- S11 parameters, real and imaginary parts are evaluated separately at 3 key frequency points.
# Build the pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('reducer', PCA(n_components=8)),
('regressor', MultiOutputRegressor(RandomForestRegressor(random_state=101)))])
# Split the data and S11 parameters into training and testing data, use the same model using two different target sets.
X_train, X_test, y_train1, y_test1 = train_test_split(data, real_S11[1::].reset_index(drop=True).iloc[:, key_frequencies], test_size=0.3, random_state=101)
X_train, X_test, y_train2, y_test2 = train_test_split(data, imag_S11[1::].reset_index(drop=True).iloc[:, key_frequencies], test_size=0.3, random_state=101)
# Fit the pipeline to the training data and get predictions for both real and imaginary parameters.
pipe.fit(X_train, y_train1)
real_S11_predict= pipe.predict(X_test)
pipe.fit(X_train, y_train2)
imag_S11_predict= pipe.predict(X_test)
# Calculate the S11 values
S11_magnitude_predict = np.sqrt(real_S11_predict**2 + imag_S11_predict**2)
S11_magnitude_test = np.sqrt(y_test1**2 + y_test2**2)
# Score the mse on the test set
MSE_4 = mean_squared_error(S11_magnitude_test, S11_magnitude_predict)
# Prints the mse
print(MSE_4)
0.04507268081460516
3. Multi Target Regression (PCA Applied)¶
- S11 values are evaluated by 4 principle components.
# Build the pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('reducer', PCA(n_components=8)),
('regressor', RandomForestRegressor(random_state=101))])
# Split the data and S11_target into training and testing data
X_train, X_test, y_train_pca, y_test_pca = train_test_split(data, S11_target_pca, test_size=0.3, random_state=101)
# Fit the pipeline to the training data
pipe.fit(X_train, y_train_pca)
# Make predictions on the test set
y_pred_pca = pipe.predict(X_test)
# Calculate the mean squared error
MSE_5 = mean_squared_error(y_test_pca, y_pred_pca)
# Prints the accuracy score
print(MSE_5)
15.214632867210318
# Reconstruct the original data from the principle components
S11_pred_reconstructed = scaler1.inverse_transform(pca1.inverse_transform(y_pred_pca))
S11_test_reconstructed = scaler1.inverse_transform(pca1.inverse_transform(y_test_pca))
# Calculate the mean squared error of prediction and the test magnitude (reconstructed from 4 principle components)
MSE_5_1 = mean_squared_error(S11_pred_reconstructed, S11_test_reconstructed)
# Prints the mse
print(MSE_5_1)
0.013517684318910475
# Calculate the mean squared error of prediction and the real magnitude
MSE_5_2 = mean_squared_error(S11_pred_reconstructed, y_test_201)
# Prints the mse
print(MSE_5_2)
0.017027761884641314
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('51th Design Prediction (Reconstructed From 4 Principle Components)')
ax.set_xlabel('Frequency Index')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "",
ha="center", fontsize=10, wrap=True)
min_value = np.min(S11_pred_reconstructed[4, :])
min_index = np.argmin(S11_pred_reconstructed[4, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_pred_reconstructed[4, :], label="Prediction", c='b', lw=2)
min_value = np.min(S11_test_reconstructed[4, :])
min_index = np.argmin(S11_test_reconstructed[4, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_test_reconstructed[4, :], label="Test Data (Reduced Form)",c='g', ls=':', lw=1.5)
min_value = np.min(y_test_201.iloc[4, :])
min_index = np.argmin(y_test_201.iloc[4, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(y_test_201.iloc[4, :], label="Actual Data", c='r', ls=':', lw=1.5)
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x367232340>
- S11 parameters, real and imaginary parts are evaluated by 4 principle components.
# Build the pipeline
pipe = Pipeline([
('scaler', StandardScaler()),
('reducer', PCA(n_components=8)),
('regressor', RandomForestRegressor(random_state=101))])
# Split the data and S11_target into training and testing data
X_train, X_test, y_train_pca_real, y_test_pca_real = train_test_split(data, Real_pca, test_size=0.3, random_state=101)
X_train, X_test, y_train_pca_imag, y_test_pca_imag = train_test_split(data, Imag_pca, test_size=0.3, random_state=101)
# Fit the pipeline to the training data
pipe.fit(X_train, y_train_pca_real)
# Make predictions on the test set
y_pred_pca_real = pipe.predict(X_test)
# Fit the pipeline to the training data
pipe.fit(X_train, y_train_pca_imag)
# Make predictions on the test set
y_pred_pca_imag = pipe.predict(X_test)
# Calculate the mean squared error
MSE_6 = mean_squared_error(y_test_pca_real, y_pred_pca_real, multioutput='uniform_average')
# Prints the accuracy score
print(MSE_6)
# Calculate the mean squared error
MSE_7 = mean_squared_error(y_test_pca_imag, y_pred_pca_imag, multioutput='uniform_average')
# Prints the accuracy score
print(MSE_7)
8.7410756222609 36.02693734160932
# Reconstruct the original data from the principle components
real_pred_reconstructed = scaler2.inverse_transform(pca2.inverse_transform(y_pred_pca_real))
real_test_reconstructed = scaler2.inverse_transform(pca2.inverse_transform(y_test_pca_real))
imag_pred_reconstructed = scaler3.inverse_transform(pca3.inverse_transform(y_pred_pca_imag))
imag_test_reconstructed = scaler3.inverse_transform(pca3.inverse_transform(y_test_pca_imag))
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('51th Design Prediction (Reconstructed From 4 Principle Components)')
ax.set_xlabel('Frequency Index')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "",
ha="center", fontsize=10, wrap=True)
plt.plot(real_pred_reconstructed[4, :], label="Real Part Prediction",marker='s',markevery=10, c='b', lw=2)
plt.plot(imag_pred_reconstructed[4, :], label="Imaginary Part Prediction",marker='o',markevery=10, c='b',lw=2)
plt.plot(real_test_reconstructed[4, :], label="Real Part (Test)",marker='s',markevery=10,markersize=5, c='g',ls=':', lw=1.5)
plt.plot(imag_test_reconstructed[4, :], label="Imaginary Part (Test)",marker='o',markevery=10,markersize=5, c='g',ls=':',lw=1.5)
plt.plot(y_test_201_real.iloc[4, :], label="Real Part (Actual)",marker='s',markevery=10,markersize=5, c='r',ls=':', lw=1.5)
plt.plot(y_test_201_imag.iloc[4, :], label="Imaginary Part (Actual)",marker='o',markevery=10,markersize=5, c='r',ls=':',lw=1.5)
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x36727ff70>
# Calculate the S11 values
S11_magnitude_predict = np.sqrt(real_pred_reconstructed**2 + imag_pred_reconstructed**2)
S11_magnitude_test = np.sqrt(real_test_reconstructed**2 + imag_test_reconstructed**2)
S11_magnitude = np.sqrt(y_test_201_real**2 + y_test_201_imag**2)
# Score the mse on the test set
MSE_7_1 = mean_squared_error(S11_magnitude_test, S11_magnitude_predict)
# Prints the mse
print(MSE_7_1)
0.044323588402768226
# Score the mse on the actual set
MSE_7_2 = mean_squared_error(S11_magnitude, S11_magnitude_predict)
# Prints the mse
print(MSE_7_2)
0.035406445184256806
fig, ax = plt.subplots(figsize=(10, 6))
# Customize the plot
ax.set_title('51th Design Prediction (Reconstructed From 4 Principle Components)')
ax.set_xlabel('Frequency Index')
ax.grid(True)
# Add a caption below the plot
plt.figtext(0.5, -0.05, "",
ha="center", fontsize=10, wrap=True)
min_value = np.min(S11_magnitude_predict[4, :])
min_index = np.argmin(S11_magnitude_predict[4, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_magnitude_predict[4, :], label="S11 Prediction", c='b', lw=2)
min_value = np.min(S11_magnitude_test[4, :])
min_index = np.argmin(S11_magnitude_test[4, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_magnitude_test[4, :], label="S11 Test", c='g', ls=':', lw=1.5)
min_value = np.min(S11_magnitude.iloc[4, :])
min_index = np.argmin(S11_magnitude.iloc[4, :])
plt.text(min_index, min_value, f'x ({min_index}, {min_value:.5f})', fontsize=8, ha='left')
plt.plot(S11_magnitude.iloc[4, :], label="S11 Actual Magnitude", c='r', ls=':', lw=1.5)
ax.legend(loc="best", fontsize=10)
<matplotlib.legend.Legend at 0x361db3ee0>